Recently we have observed that most of our fellow community members at Kaggle are developing very simple and similar graphs using the same old libraries, without any interesting approaches or any kind of interactivity (apart from bouncing labels ;) ). So, we decided to try some new and different kind of graphs among the vast number of libraries that are available online for data visualization. Hence we present this Kernel with some interesting graphs - DrillDown Charts, Network Plots & Motion Plots. Constructive criticism will be appreciated. Please upvote our work! Your support will motivate us to try more cool stuff and bring it to the community.
This section deals with the analysis of the most important aspect of Stack Overflow - its community. It shows a detailed analysis of the various metrics that were explored during the EDA. We will take a detailed look at the Country, Education, Non-Degree Education Sources, Occupations, Job Experience, Demographics (such as Gender, Age) and the overall satisfaction of the users with Stack Overflow and its community.
data(worldgeojson, package = "highcharter")
by_country <- survey_results_public %>% select(Country) %>% filter(!is.na(Country)) %>%group_by(Country) %>% summarise(n1=n())
code <- countrycode(by_country$Country, 'country.name', 'iso3c')
by_country$iso3 <- code
p_by_country <- highchart() %>%
hc_add_series_map(worldgeojson, by_country, value = "n1", joinBy = "iso3") %>%
hc_colorAxis(stops = color_stops()) %>%
hc_legend(enabled = TRUE) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_title(text = "Respondent by Country") %>%
hc_tooltip(useHTML = TRUE, headerFormat = "",
pointFormat = "Country: {point.Country} Total Respondent: {point.n1}") %>% hc_add_theme(hc_theme_google())
professionals_result <- survey_results_public %>% filter(Student=="No")
professionals_by_country <- professionals_result %>% select(Country) %>% filter(!is.na(Country)) %>%group_by(Country) %>% summarise(n2=n())
code <- countrycode(professionals_by_country$Country, 'country.name', 'iso3c')
professionals_by_country$iso3 <- code
combined_result <- by_country %>% left_join(professionals_by_country, by="iso3") %>% select(iso3, Country.x, n1, n2)
names(combined_result) <- c("iso3", "Country", "n1", "n2")
data(worldgeojson, package = "highcharter")
p_professionals_by_country <- highchart() %>%
hc_add_series_map(worldgeojson, combined_result, value = "n2", joinBy = "iso3") %>%
hc_colorAxis(stops = color_stops()) %>%
hc_legend(enabled = TRUE) %>%
#hc_add_theme(hc_theme_google()) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_title(text = "Professionals Respondent by Countries") %>%
hc_tooltip(useHTML = TRUE, headerFormat = "",
pointFormat = "Country: {point.Country} Professionals: {point.n2} Total Respondent: {point.n1}") %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_country,
p_professionals_by_country
)
hw_grid(lst, rowheight = 350)
by_Student <- survey_results_public %>%
filter(!is.na(Student)) %>%
group_by(Student) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(Student = reorder(Student,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_Student$Student) %>%
hc_add_series(name = "Percent %", data = by_Student$Percent, colorByPoint = 1) %>%
hc_title(text = "Are Respondent Student") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_FormalEducation <- survey_results_public %>%
filter(!is.na(FormalEducation)) %>%
group_by(FormalEducation) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(FormalEducation = reorder(FormalEducation,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100)) %>%
head(10)
p_by_FormalEducation <- highchart() %>%
hc_xAxis(categories = by_FormalEducation$FormalEducation) %>%
hc_add_series(name = "Percent %", data = by_FormalEducation$Percent, colorByPoint = 1) %>%
hc_title(text = "Formal Education of Respondent") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_UndergradMajor <- survey_results_public %>%
filter(!is.na(UndergradMajor)) %>%
group_by(UndergradMajor) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(UndergradMajor = reorder(UndergradMajor,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100)) %>%
head(10)
p_by_UndergradMajor <- highchart() %>%
hc_xAxis(categories = by_UndergradMajor$UndergradMajor) %>%
hc_add_series(name = "Percent %", data = by_UndergradMajor$Percent, colorByPoint = 1) %>%
hc_title(text = "Main Field of Study of Respondent") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_FormalEducation,
p_by_UndergradMajor
)
hw_grid(lst, rowheight = 400)
by_EducationTypes <- survey_results_public %>%
select(Respondent,EducationTypes) %>%
mutate(EducationTypes = strsplit(as.character(EducationTypes), ";")) %>%
unnest(EducationTypes) %>%
filter(!is.na(EducationTypes)) %>%
group_by(EducationTypes) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(EducationTypes = reorder(EducationTypes,Total)) %>%
mutate(Percent = round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_EducationTypes <- highchart() %>%
hc_xAxis(categories = by_EducationTypes$EducationTypes) %>%
hc_add_series(name = "Percent %", data = by_EducationTypes$Percent, colorByPoint = 1) %>%
hc_title(text = "Non-Degree Education of Respondent") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_SelfTaughtTypes <- survey_results_public %>%
select(Respondent,SelfTaughtTypes) %>%
mutate(SelfTaughtTypes = strsplit(as.character(SelfTaughtTypes), ";")) %>%
unnest(SelfTaughtTypes) %>%
filter(!is.na(SelfTaughtTypes)) %>%
group_by(SelfTaughtTypes) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(SelfTaughtTypes = reorder(SelfTaughtTypes,Total)) %>%
mutate(Percent = round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_SelfTaughtTypes <- highchart() %>%
hc_xAxis(categories = by_SelfTaughtTypes$SelfTaughtTypes) %>%
hc_add_series(name = "Percent %", data = by_SelfTaughtTypes$Percent, colorByPoint = 1) %>%
hc_title(text = "Self Taught Types Education of Respondent") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_EducationTypes,
p_by_SelfTaughtTypes
)
hw_grid(lst, rowheight = 400)
survey_results_public2 <- survey_results_public %>% mutate(EducationTypes = strsplit(as.character(EducationTypes), ";")) %>%
unnest(EducationTypes)
df1 <- survey_results_public2 %>%
filter(!is.na(EducationTypes)) %>%
group_by(name = EducationTypes, drilldown = tolower(EducationTypes)) %>%
summarise(y = n()) %>% arrange(desc(y))
df2 <-survey_results_public2 %>% filter(!is.na(EducationTypes)) %>% filter(!is.na(TimeAfterBootcamp)) %>% group_by(EducationTypes,TimeAfterBootcamp) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = EducationTypes, id = tolower(EducationTypes),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = TimeAfterBootcamp, drilldown = tolower(paste(EducationTypes,TimeAfterBootcamp,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y)))
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'Types of Non-Degree Education Vs Time to get a Full-time Job as a Developer ') %>%
hc_add_series(data = df1, name = "Types of Non-Degree Education",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series =list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
df <- survey_results_public %>% select(Respondent,EducationTypes)
df2 <- df %>%
mutate(EducationTypes = strsplit(as.character(EducationTypes), ";")) %>%
unnest(EducationTypes)
df2_edges <- df2 %>% group_by(Respondent) %>%
filter(n()>=2) %>%
do(data.frame(t(combn((.)[["EducationTypes"]], 2)), stringsAsFactors=FALSE)) %>% ungroup() %>%
rename(source = X1, target = X2) %>%
select(-Respondent)
df2_edges <- df2_edges %>% group_by(source,target) %>% summarise(weight=n())
names(df2_edges) <- c("from","to","weight")
df2_edges$weight <- df2_edges$weight/1500
df2_edges$width <- 1+df2_edges$weight # line width
df2_edges$color <- "gray" # line color
#df2_edges$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
df2_edges$smooth <- FALSE # should the edges be curved?
df2_edges$shadow <- FALSE # edge shadow
df2_nodes <- df2 %>% filter(!is.na(EducationTypes)) %>% group_by(EducationTypes) %>% summarise(n = n()/1000) %>% arrange(desc(n))
names(df2_nodes) <- c("id","size")
n <- nrow(df2_nodes)
palette <- distinctColorPalette(n)
df2_nodes$shape <- "dot"
df2_nodes$shadow <- TRUE # Nodes will drop shadow
df2_nodes$title <- df2_nodes$id # Text on click
df2_nodes$label <- df2_nodes$id # Node label
df2_nodes$size <- df2_nodes$size # Node size
df2_nodes$borderWidth <- 2 # Node border width
df2_nodes$color.background <- palette[as.numeric(as.factor(df2_nodes$id))]
df2_nodes$color.border <- "black"
df2_nodes$color.highlight.background <- "orange"
df2_nodes$color.highlight.border <- "darkred"
df2_nodes <- df2 %>% filter(!is.na(EducationTypes)) %>% group_by(EducationTypes) %>% summarise(n = n()/1000) %>% arrange(desc(n))
names(df2_nodes) <- c("id","size")
n <- nrow(df2_nodes)
palette <- distinctColorPalette(n)
df2_nodes$shape <- "dot"
df2_nodes$shadow <- TRUE # Nodes will drop shadow
df2_nodes$title <- df2_nodes$id # Text on click
df2_nodes$label <- df2_nodes$id # Node label
df2_nodes$size <- df2_nodes$size # Node size
df2_nodes$borderWidth <- 2 # Node border width
df2_nodes$color.background <- palette[as.numeric(as.factor(df2_nodes$id))]
df2_nodes$color.border <- "black"
df2_nodes$color.highlight.background <- "orange"
df2_nodes$color.highlight.border <- "darkred"
visNetwork(df2_nodes, df2_edges, height = "500px", width = "100%") %>% visIgraphLayout(layout = "layout_with_lgl") %>%
visEdges(shadow = TRUE,
color = list(color = "gray", highlight = "orange"))
by_HackathonReasons <- survey_results_public %>%
select(Respondent,HackathonReasons) %>%
mutate(HackathonReasons = strsplit(as.character(HackathonReasons), ";")) %>%
unnest(HackathonReasons) %>%
filter(!is.na(HackathonReasons)) %>%
group_by(HackathonReasons) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(HackathonReasons = reorder(HackathonReasons,Total)) %>%
mutate(Percent = round(Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_HackathonReasons$HackathonReasons) %>%
hc_add_series(name = "Percent %", data = by_HackathonReasons$Percent, colorByPoint = 1) %>%
hc_title(text = "Reasons For Participating in Hackathon") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_DevType <- survey_results_public %>%
select(Respondent,DevType) %>%
mutate(DevType = strsplit(as.character(DevType), ";")) %>%
unnest(DevType) %>%
filter(!is.na(DevType)) %>%
group_by(DevType) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(DevType = reorder(DevType,Total)) %>%
mutate(Percent = round(Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_DevType$DevType) %>%
hc_add_series(name = "Percent %", data = by_DevType$Percent, colorByPoint = 1) %>%
hc_title(text = "Developer Type") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
df <- survey_results_public %>% select(Respondent,DevType)
df2 <- df %>%
mutate(DevType = strsplit(as.character(DevType), ";")) %>%
unnest(DevType)
df2_edges <- df2 %>% group_by(Respondent) %>%
filter(n()>=2) %>%
do(data.frame(t(combn((.)[["DevType"]], 2)), stringsAsFactors=FALSE)) %>% ungroup() %>%
rename(source = X1, target = X2) %>%
select(-Respondent)
df2_edges <- df2_edges %>% group_by(source,target) %>% summarise(weight=n())
names(df2_edges) <- c("from","to","weight")
df2_edges$weight <- df2_edges$weight/1500
df2_edges$width <- 1+df2_edges$weight # line width
df2_edges$color <- "gray" # line color
#df2_edges$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
df2_edges$smooth <- FALSE # should the edges be curved?
df2_edges$shadow <- FALSE # edge shadow
df2_nodes <- df2 %>% filter(!is.na(DevType)) %>% group_by(DevType) %>% summarise(n = n()/700) %>% arrange(desc(n))
names(df2_nodes) <- c("id","size")
n <- nrow(df2_nodes)
palette <- distinctColorPalette(n)
df2_nodes$shape <- "dot"
df2_nodes$shadow <- TRUE # Nodes will drop shadow
df2_nodes$title <- df2_nodes$id # Text on click
df2_nodes$label <- df2_nodes$id # Node label
df2_nodes$size <- df2_nodes$size # Node size
df2_nodes$borderWidth <- 2 # Node border width
df2_nodes$color.background <- palette[as.numeric(as.factor(df2_nodes$id))]
df2_nodes$color.border <- "black"
df2_nodes$color.highlight.background <- "orange"
df2_nodes$color.highlight.border <- "darkred"
visNetwork(df2_nodes, df2_edges, height = "500px", width = "100%") %>% visIgraphLayout(layout = "layout_with_lgl") %>%
visEdges(shadow = TRUE,
color = list(color = "gray", highlight = "orange"))
survey_results_public2 <- survey_results_public %>% mutate(DevType = strsplit(as.character(DevType), ";")) %>%
unnest(DevType)
df1 <- survey_results_public2 %>%
filter(!is.na(OpenSource)) %>%
group_by(name = OpenSource, drilldown = tolower(OpenSource)) %>%
summarise(y = n()) %>% arrange(desc(y))
df2 <-survey_results_public2 %>% filter(!is.na(OpenSource)) %>% filter(!is.na(DevType)) %>% group_by(OpenSource,DevType) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = OpenSource, id = tolower(OpenSource),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = DevType, drilldown = tolower(paste(OpenSource,DevType,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y)))
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'Who Contributed to opensource') %>%
hc_add_series(data = df1, name = "Is Contributed to opensource",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series =list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
survey_results_public2 <- survey_results_public %>% mutate(DevType = strsplit(as.character(DevType), ";")) %>%
unnest(DevType)
df1 <- survey_results_public2 %>%
filter(!is.na(Hobby)) %>%
group_by(name = Hobby, drilldown = tolower(Hobby)) %>%
summarise(y = n()) %>% arrange(desc(y))
df2 <-survey_results_public2 %>% filter(!is.na(Hobby)) %>% filter(!is.na(DevType)) %>% group_by(Hobby,DevType) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = Hobby, id = tolower(Hobby),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = DevType, drilldown = tolower(paste(Hobby,DevType,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y)))
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'Who have code as Hobby') %>%
hc_add_series(data = df1, name = "Is Coding Hobby",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series =list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
df1 <- survey_results_public %>% filter(!is.na(YearsCoding)) %>%
group_by(name = YearsCoding, drilldown = tolower(YearsCoding)) %>%
summarise(y = n()) %>% arrange(desc(y)) %>% head(10)
df2 <-survey_results_public %>% filter(!is.na(YearsCoding)) %>% filter(!is.na(YearsCodingProf)) %>% group_by(YearsCoding,YearsCodingProf) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = YearsCoding, id = tolower(YearsCoding),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = YearsCodingProf, drilldown = tolower(paste(YearsCoding,YearsCodingProf,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y)))
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'No Of Years Coding Vs No Of Years Coded Professionally') %>%
hc_add_series(data = df1, name = "No Of Years Coding",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_yAxis(title = list(text = "Total Response")) %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series = list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
3-5 years : 23313 Professional Coding Years 1. 0-2 years : 10104 2. 3-5 years : 8126
6-8 years : 19338 : Professional Coding Years 1. 3-5 years : 7678 2. 0-2 years : 4753 3. 6-8 years : 4064
9-11 years : 12169 Professional Coding Years 1. 6-8 years : 3616 2. 3-5 years : 3356 3. 9-11 years : 2408 4. 0-2 years : 1215
by_Gender <- survey_results_public %>%
mutate(Gender = strsplit(as.character(Gender), ";")) %>%
unnest(Gender) %>%
filter(!is.na(Gender)) %>%
group_by(Gender) %>%
summarise(n = n()) %>%
mutate(percentage = round((n / sum(n))*100))
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "Gender Distribution") %>%
hc_add_series_labels_values(labels = by_Gender$Gender, values = by_Gender$percentage) %>% hc_add_theme(hc_theme_google())
by_yearOfExp_gender <- survey_results_public %>% select(YearsCoding, Gender) %>%
filter(!is.na(YearsCoding)) %>%
mutate(YearsCodingNum = parse_number(YearsCoding),
Gender = str_split(Gender, pattern = ";")) %>%
unnest(Gender) %>%
mutate(Gender = case_when(str_detect(Gender, "Non-binary") ~ "Non-binary",
TRUE ~ Gender)) %>%
group_by(YearsCodingNum, Gender) %>%
summarise(n = n()) %>%
filter(Gender %in% c("Male", "Female", "Non-binary"))
hchart(by_yearOfExp_gender, "line", hcaes(x = YearsCodingNum, y = n, group = Gender)) %>%
hc_title(text = 'Gender Vs Years of Experience') %>%
hc_xAxis(title = list(text = "Years of Experience")) %>%
hc_yAxis(title = list(text = "No Of Male/Female")) %>% hc_add_theme(hc_theme_google())
by_yearOfExp_gender$Percent <- (by_yearOfExp_gender$n/sum(by_yearOfExp_gender$n)*100)
aqw <- dcast(by_yearOfExp_gender, YearsCodingNum ~ Gender)
aqw$sum <- aqw$Female + aqw$Male + aqw$`Non-binary`
highchart() %>%
hc_title(text = 'Gender % Vs Years of Experience') %>%
hc_chart(type = "column") %>%
hc_xAxis(categories = aqw$YearsCodingNum,title = list(text = "Years of Experience")) %>%
hc_add_series(data = aqw$sum) %>%
hc_add_series(name = "Male",type = "line", data = aqw$Male) %>%
hc_add_series(name = "Female",type = "line", data = aqw$Female) %>%
hc_add_series(name = "Non-binary",type = "line", data = aqw$`Non-binary`) %>%
hc_yAxis(title = list(text = "% Of Male/Female/Non-binary")) %>% hc_add_theme(hc_theme_google())
x <-data.frame(table(survey_results_public$AgreeDisagree1))
y <- data.frame(table(survey_results_public$AgreeDisagree2))
z <-data.frame(table(survey_results_public$AgreeDisagree3))
highchart() %>%
hc_title(text = 'Kinship vs Competition vs Self-Evaluation') %>%
hc_chart(type = "column") %>%
hc_xAxis(categories = x$Var1,title = list(text = "Agreement Level Scale")) %>%
hc_add_series(name = "I feel a sense of kinship or connection to other developers",type = "line", data = x$Freq) %>%
hc_add_series(name = "I think of myself as competing with my peers",type = "line", data = y$Freq) %>%
hc_add_series(name = "I'm not as good at programming as most of my peers",type = "line", data = z$Freq) %>%
hc_yAxis(title = list(text = "Total")) %>% hc_add_theme(hc_theme_google())
by_agreeDisagree1_yearOfExp <- survey_results_public %>%
filter(!is.na(AgreeDisagree1)) %>%
mutate(YearsCodingNum = parse_number(YearsCoding)) %>%
group_by(AgreeDisagree1,YearsCodingNum) %>%
summarise(n = n()) %>%
select(AgreeDisagree1, YearsCodingNum, n)
by_agreeDisagree1_yearOfExp$Percent <- (by_agreeDisagree1_yearOfExp$n/sum(by_agreeDisagree1_yearOfExp$n)*100)
p_by_agreeDisagree1_yearOfExp <- hchart(by_agreeDisagree1_yearOfExp, "line", hcaes(x = YearsCodingNum, y = Percent, group = AgreeDisagree1)) %>%
hc_title(text = 'Kinship by Years of Experience') %>%
hc_xAxis(title = list(text = "Years of Experience")) %>%
hc_yAxis(title = list(text = "Percentage of Agreement Level")) %>% hc_add_theme(hc_theme_google())
by_agreeDisagree2_yearOfExp <- survey_results_public %>%
filter(!is.na(AgreeDisagree2)) %>%
mutate(YearsCodingNum = parse_number(YearsCoding)) %>%
group_by(AgreeDisagree2,YearsCodingNum) %>%
summarise(n = n()) %>%
select(AgreeDisagree2, YearsCodingNum, n)
by_agreeDisagree2_yearOfExp$Percent <- (by_agreeDisagree2_yearOfExp$n/sum(by_agreeDisagree2_yearOfExp$n)*100)
p_by_agreeDisagree2_yearOfExp <- hchart(by_agreeDisagree2_yearOfExp, "line", hcaes(x = YearsCodingNum, y = Percent, group = AgreeDisagree2)) %>%
hc_title(text = 'Competition by Years of Experience') %>%
hc_xAxis(title = list(text = "Years of Experience")) %>%
hc_yAxis(title = list(text = "Percentage of Agreement Level")) %>% hc_add_theme(hc_theme_google())
by_agreeDisagree3_yearOfExp <- survey_results_public %>%
filter(!is.na(AgreeDisagree3)) %>%
mutate(YearsCodingNum = parse_number(YearsCoding)) %>%
group_by(AgreeDisagree3,YearsCodingNum) %>%
summarise(n = n()) %>%
select(AgreeDisagree3, YearsCodingNum, n)
by_agreeDisagree3_yearOfExp$Percent <- (by_agreeDisagree3_yearOfExp$n/sum(by_agreeDisagree3_yearOfExp$n)*100)
p_by_agreeDisagree3_yearOfExp <- hchart(by_agreeDisagree3_yearOfExp, "line", hcaes(x = YearsCodingNum, y = Percent, group = AgreeDisagree3)) %>%
hc_title(text = 'Self-Evaluation by Years of Experience') %>%
hc_xAxis(title = list(text = "Years of Experience")) %>%
hc_yAxis(title = list(text = "Percentage of Agreement Level")) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_agreeDisagree1_yearOfExp,
p_by_agreeDisagree2_yearOfExp,
p_by_agreeDisagree3_yearOfExp
)
hw_grid(lst, rowheight = 400)
This section deals with the analysis of the primary purpose of Stack Overflow - the technology and discussions. During the survey, the respondents were asked various questions regarding the technologies that they work on. This section delivers all the related insights gathered during the EDA which includes Programming Languages, Databases, Software Dev Platforms, Frameworks and IDEs.
by_LanguageWorkedWith <- survey_results_public %>%
select(Respondent,LanguageWorkedWith) %>%
mutate(LanguageWorkedWith = strsplit(as.character(LanguageWorkedWith), ";")) %>%
unnest(LanguageWorkedWith) %>%
filter(!is.na(LanguageWorkedWith)) %>%
group_by(LanguageWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(LanguageWorkedWith = reorder(LanguageWorkedWith,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_LanguageWorkedWith <- highchart() %>%
hc_xAxis(categories = by_LanguageWorkedWith$LanguageWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_LanguageWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Programming Language") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_LanguageWorkedWith <- survey_results_public %>%
select(Respondent,LanguageWorkedWith) %>%
mutate(LanguageWorkedWith = strsplit(as.character(LanguageWorkedWith), ";")) %>%
unnest(LanguageWorkedWith) %>%
filter(!is.na(LanguageWorkedWith)) %>%
group_by(LanguageWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(LanguageWorkedWith = reorder(LanguageWorkedWith,Total)) %>%
mutate(Percent=(Total/nrow(survey_results_public)*100)) %>%
tail(10)
p2_by_LanguageWorkedWith <- highchart() %>%
hc_xAxis(categories = by_LanguageWorkedWith$LanguageWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_LanguageWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Less Popular Programming Language") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_LanguageWorkedWith,
p2_by_LanguageWorkedWith
)
hw_grid(lst, rowheight = 400)
by_LanguageDesireNextYear <- survey_results_public %>%
select(Respondent,LanguageDesireNextYear) %>%
mutate(LanguageDesireNextYear = strsplit(as.character(LanguageDesireNextYear), ";")) %>%
unnest(LanguageDesireNextYear) %>%
filter(!is.na(LanguageDesireNextYear)) %>%
group_by(LanguageDesireNextYear) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(LanguageDesireNextYear = reorder(LanguageDesireNextYear,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_LanguageDesireNextYear$LanguageDesireNextYear) %>%
hc_add_series(name = "Percent %", data = by_LanguageDesireNextYear$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Language Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
LanguageWorkedWith_LanguageDesireNextYear_df <- survey_results_public %>% select(Respondent,LanguageWorkedWith,LanguageDesireNextYear) %>%
mutate(LanguageWorkedWith = strsplit(as.character(LanguageWorkedWith), ";")) %>%
unnest(LanguageWorkedWith) %>%
filter(!is.na(LanguageWorkedWith)) %>%
mutate(LanguageDesireNextYear = strsplit(as.character(LanguageDesireNextYear), ";")) %>%
unnest(LanguageDesireNextYear) %>%
filter(!is.na(LanguageDesireNextYear))
df2_edges <- LanguageWorkedWith_LanguageDesireNextYear_df %>%
group_by(LanguageWorkedWith,LanguageDesireNextYear) %>%
summarise(n=n()) %>% arrange(desc(n))
df2_edges <- as.data.frame(df2_edges)
names(df2_edges) <- c("from","to","weight")
x <- df2_edges %>% filter_(~from==to) %>% head(10)
x$percent <- round(x$weight/nrow(survey_results_public)*100)
p_LanguageWorkedWith_LanguageDesireNextYear_df <- highchart() %>%
hc_xAxis(categories = x$from) %>%
hc_add_series(name = "Percent %", data = x$percent, colorByPoint = 1) %>%
hc_title(text = "Same Programming Language Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_LanguageWorkedWith_LanguageDesireNextYear_df,
p_by_LanguageWorkedWith
)
hw_grid(lst, rowheight = 400)
by_DatabaseWorkedWith <- survey_results_public %>%
select(Respondent,DatabaseWorkedWith) %>%
mutate(DatabaseWorkedWith = strsplit(as.character(DatabaseWorkedWith), ";")) %>%
unnest(DatabaseWorkedWith) %>%
filter(!is.na(DatabaseWorkedWith)) %>%
group_by(DatabaseWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(DatabaseWorkedWith = reorder(DatabaseWorkedWith,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_DatabaseWorkedWith <- highchart() %>%
hc_xAxis(categories = by_DatabaseWorkedWith$DatabaseWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_DatabaseWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Database") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_DatabaseWorkedWith <- survey_results_public %>%
select(Respondent,DatabaseWorkedWith) %>%
mutate(DatabaseWorkedWith = strsplit(as.character(DatabaseWorkedWith), ";")) %>%
unnest(DatabaseWorkedWith) %>%
filter(!is.na(DatabaseWorkedWith)) %>%
group_by(DatabaseWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(DatabaseWorkedWith = reorder(DatabaseWorkedWith,Total)) %>%
mutate(Percent=(Total/nrow(survey_results_public)*100)) %>%
tail(10)
p2_by_DatabaseWorkedWith <- highchart() %>%
hc_xAxis(categories = by_DatabaseWorkedWith$DatabaseWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_DatabaseWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Less Popular Database") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_DatabaseWorkedWith,
p2_by_DatabaseWorkedWith
)
hw_grid(lst, rowheight = 400)
by_DatabaseDesireNextYear <- survey_results_public %>%
select(Respondent,DatabaseDesireNextYear) %>%
mutate(DatabaseDesireNextYear = strsplit(as.character(DatabaseDesireNextYear), ";")) %>%
unnest(DatabaseDesireNextYear) %>%
filter(!is.na(DatabaseDesireNextYear)) %>%
group_by(DatabaseDesireNextYear) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(DatabaseDesireNextYear = reorder(DatabaseDesireNextYear,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_DatabaseDesireNextYear$DatabaseDesireNextYear) %>%
hc_add_series(name = "Percent %", data = by_DatabaseDesireNextYear$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Database Next Year Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
DatabaseWorkedWith_DatabaseDesireNextYear_df <- survey_results_public %>% select(Respondent,DatabaseWorkedWith,DatabaseDesireNextYear) %>%
mutate(DatabaseWorkedWith = strsplit(as.character(DatabaseWorkedWith), ";")) %>%
unnest(DatabaseWorkedWith) %>%
filter(!is.na(DatabaseWorkedWith)) %>%
mutate(DatabaseDesireNextYear = strsplit(as.character(DatabaseDesireNextYear), ";")) %>%
unnest(DatabaseDesireNextYear) %>%
filter(!is.na(DatabaseDesireNextYear))
df2_edges <- DatabaseWorkedWith_DatabaseDesireNextYear_df %>%
group_by(DatabaseWorkedWith,DatabaseDesireNextYear) %>%
summarise(n=n()) %>% arrange(desc(n))
df2_edges <- as.data.frame(df2_edges)
names(df2_edges) <- c("from","to","weight")
x <- df2_edges %>% filter_(~from==to) %>% head(10)
x$percent <- round(x$weight/nrow(survey_results_public)*100)
p_DatabaseWorkedWith_DatabaseDesireNextYear_df <- highchart() %>%
hc_xAxis(categories = x$from) %>%
hc_add_series(name = "Percent %", data = x$percent, colorByPoint = 1) %>%
hc_title(text = "Same Database Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_DatabaseWorkedWith_DatabaseDesireNextYear_df,
p_by_DatabaseWorkedWith
)
hw_grid(lst, rowheight = 400)
by_PlatformWorkedWith <- survey_results_public %>%
select(Respondent,PlatformWorkedWith) %>%
mutate(PlatformWorkedWith = strsplit(as.character(PlatformWorkedWith), ";")) %>%
unnest(PlatformWorkedWith) %>%
filter(!is.na(PlatformWorkedWith)) %>%
group_by(PlatformWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(PlatformWorkedWith = reorder(PlatformWorkedWith,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_PlatformWorkedWith <- highchart() %>%
hc_xAxis(categories = by_PlatformWorkedWith$PlatformWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_PlatformWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Platform") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_PlatformWorkedWith <- survey_results_public %>%
select(Respondent,PlatformWorkedWith) %>%
mutate(PlatformWorkedWith = strsplit(as.character(PlatformWorkedWith), ";")) %>%
unnest(PlatformWorkedWith) %>%
filter(!is.na(PlatformWorkedWith)) %>%
group_by(PlatformWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(PlatformWorkedWith = reorder(PlatformWorkedWith,Total)) %>%
mutate(Percent=(Total/nrow(survey_results_public)*100)) %>%
tail(10)
p2_by_PlatformWorkedWith<- highchart() %>%
hc_xAxis(categories = by_PlatformWorkedWith$PlatformWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_PlatformWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Least Popular Platform") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_PlatformWorkedWith,
p2_by_PlatformWorkedWith
)
hw_grid(lst, rowheight = 400)
by_PlatformDesireNextYear <- survey_results_public %>%
select(Respondent,PlatformDesireNextYear) %>%
mutate(PlatformDesireNextYear = strsplit(as.character(PlatformDesireNextYear), ";")) %>%
unnest(PlatformDesireNextYear) %>%
filter(!is.na(PlatformDesireNextYear)) %>%
group_by(PlatformDesireNextYear) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(PlatformDesireNextYear = reorder(PlatformDesireNextYear,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_PlatformDesireNextYear$PlatformDesireNextYear) %>%
hc_add_series(name = "Percent %", data = by_PlatformDesireNextYear$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Platform Next Year Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
PlatformWorkedWith_PlatformDesireNextYear_df <- survey_results_public %>% select(Respondent,PlatformWorkedWith,PlatformDesireNextYear) %>%
mutate(PlatformWorkedWith = strsplit(as.character(PlatformWorkedWith), ";")) %>%
unnest(PlatformWorkedWith) %>%
filter(!is.na(PlatformWorkedWith)) %>%
mutate(PlatformDesireNextYear = strsplit(as.character(PlatformDesireNextYear), ";")) %>%
unnest(PlatformDesireNextYear) %>%
filter(!is.na(PlatformDesireNextYear))
df2_edges <- PlatformWorkedWith_PlatformDesireNextYear_df %>%
group_by(PlatformWorkedWith,PlatformDesireNextYear) %>%
summarise(n=n()) %>% arrange(desc(n))
df2_edges <- as.data.frame(df2_edges)
names(df2_edges) <- c("from","to","weight")
x <- df2_edges %>% filter_(~from==to) %>% head(10)
x$percent <- round(x$weight/nrow(survey_results_public)*100)
p_PlatformWorkedWith_PlatformDesireNextYear_df<-highchart() %>%
hc_xAxis(categories = x$from) %>%
hc_add_series(name = "Percent %", data = x$percent, colorByPoint = 1) %>%
hc_title(text = "Same Platform Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_PlatformWorkedWith_PlatformDesireNextYear_df,
p_by_PlatformWorkedWith
)
hw_grid(lst, rowheight = 400)
by_FrameworkWorkedWith <- survey_results_public %>%
select(Respondent,FrameworkWorkedWith) %>%
mutate(FrameworkWorkedWith = strsplit(as.character(FrameworkWorkedWith), ";")) %>%
unnest(FrameworkWorkedWith) %>%
filter(!is.na(FrameworkWorkedWith)) %>%
group_by(FrameworkWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(FrameworkWorkedWith = reorder(FrameworkWorkedWith,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_FrameworkWorkedWith <- highchart() %>%
hc_xAxis(categories = by_FrameworkWorkedWith$FrameworkWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_FrameworkWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Framework") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_FrameworkWorkedWith <- survey_results_public %>%
select(Respondent,FrameworkWorkedWith) %>%
mutate(FrameworkWorkedWith = strsplit(as.character(FrameworkWorkedWith), ";")) %>%
unnest(FrameworkWorkedWith) %>%
filter(!is.na(FrameworkWorkedWith)) %>%
group_by(FrameworkWorkedWith) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(FrameworkWorkedWith = reorder(FrameworkWorkedWith,Total)) %>%
mutate(Percent=(Total/nrow(survey_results_public)*100)) %>%
tail(10)
p2_by_FrameworkWorkedWith <- highchart() %>%
hc_xAxis(categories = by_FrameworkWorkedWith$FrameworkWorkedWith) %>%
hc_add_series(name = "Percent %", data = by_FrameworkWorkedWith$Percent, colorByPoint = 1) %>%
hc_title(text = "Least Popular Framework") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_FrameworkWorkedWith,
p2_by_FrameworkWorkedWith
)
hw_grid(lst, rowheight = 400)
by_FrameworkDesireNextYear <- survey_results_public %>%
select(Respondent,FrameworkDesireNextYear) %>%
mutate(FrameworkDesireNextYear = strsplit(as.character(FrameworkDesireNextYear), ";")) %>%
unnest(FrameworkDesireNextYear) %>%
filter(!is.na(FrameworkDesireNextYear)) %>%
group_by(FrameworkDesireNextYear) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(FrameworkDesireNextYear = reorder(FrameworkDesireNextYear,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_FrameworkDesireNextYear$FrameworkDesireNextYear) %>%
hc_add_series(name = "Percent %", data = by_FrameworkDesireNextYear$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular Framework Next Year Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
FrameworkWorkedWith_FrameworkDesireNextYear_df <- survey_results_public %>% select(Respondent,FrameworkWorkedWith,FrameworkDesireNextYear) %>%
mutate(FrameworkWorkedWith = strsplit(as.character(FrameworkWorkedWith), ";")) %>%
unnest(FrameworkWorkedWith) %>%
filter(!is.na(FrameworkWorkedWith)) %>%
mutate(FrameworkDesireNextYear = strsplit(as.character(FrameworkDesireNextYear), ";")) %>%
unnest(FrameworkDesireNextYear) %>%
filter(!is.na(FrameworkDesireNextYear))
df2_edges <- FrameworkWorkedWith_FrameworkDesireNextYear_df %>%
group_by(FrameworkWorkedWith,FrameworkDesireNextYear) %>%
summarise(n=n()) %>% arrange(desc(n))
df2_edges <- as.data.frame(df2_edges)
names(df2_edges) <- c("from","to","weight")
x <- df2_edges %>% filter_(~from==to) %>% head(10)
x$percent <- round(x$weight/nrow(survey_results_public)*100)
p_FrameworkWorkedWith_FrameworkDesireNextYear_df <- highchart() %>%
hc_xAxis(categories = x$from) %>%
hc_add_series(name = "Percent %", data = x$percent, colorByPoint = 1) %>%
hc_title(text = "Same Framework Desire for Next Year") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_FrameworkWorkedWith_FrameworkDesireNextYear_df,
p_by_FrameworkWorkedWith
)
hw_grid(lst, rowheight = 400)
by_IDE <- survey_results_public %>%
select(Respondent,IDE) %>%
mutate(IDE = strsplit(as.character(IDE), ";")) %>%
unnest(IDE) %>%
filter(!is.na(IDE)) %>%
group_by(IDE) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(IDE = reorder(IDE,Total)) %>%
mutate(Percent=round(Total/nrow(survey_results_public)*100)) %>%
head(10)
p_by_IDE <- highchart() %>%
hc_xAxis(categories = by_IDE$IDE) %>%
hc_add_series(name = "Percent %", data = by_IDE$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Popular IDE") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_IDE <- survey_results_public %>%
select(Respondent,IDE) %>%
mutate(IDE = strsplit(as.character(IDE), ";")) %>%
unnest(IDE) %>%
filter(!is.na(IDE)) %>%
group_by(IDE) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(IDE = reorder(IDE,Total)) %>%
mutate(Percent=(Total/nrow(survey_results_public)*100)) %>%
tail(10)
p2_by_IDE <- highchart() %>%
hc_xAxis(categories = by_IDE$IDE) %>%
hc_add_series(name = "Percent %", data = by_IDE$Percent, colorByPoint = 1) %>%
hc_title(text = "Least Popular IDE") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p_by_IDE,
p2_by_IDE
)
hw_grid(lst, rowheight = 400)
df <- survey_results_public %>% select(Respondent,IDE)
df2 <- df %>%
mutate(IDE = strsplit(as.character(IDE), ";")) %>%
unnest(IDE)
df2_edges <- df2 %>% group_by(Respondent) %>%
filter(n()>=2) %>%
do(data.frame(t(combn((.)[["IDE"]], 2)), stringsAsFactors=FALSE)) %>% ungroup() %>%
rename(source = X1, target = X2) %>%
select(-Respondent)
df2_edges <- df2_edges %>% group_by(source,target) %>% summarise(weight=n())
names(df2_edges) <- c("from","to","weight")
df2_edges$weight <- df2_edges$weight/800
df2_edges$width <- 1+df2_edges$weight # line width
#df2_edges$color <- "gray" # line color
#df2_edges$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
df2_edges$smooth <- FALSE # should the edges be curved?
df2_edges$shadow <- FALSE # edge shadow
df2_nodes <- df2 %>% filter(!is.na(IDE)) %>% group_by(IDE) %>% summarise(n = n()/500) %>% arrange(desc(n))
names(df2_nodes) <- c("id","size")
n <- nrow(df2_nodes)
palette <- distinctColorPalette(n)
df2_nodes$shape <- "dot"
df2_nodes$shadow <- TRUE # Nodes will drop shadow
df2_nodes$title <- df2_nodes$id # Text on click
df2_nodes$label <- df2_nodes$id # Node label
df2_nodes$size <- df2_nodes$size # Node size
df2_nodes$borderWidth <- 2 # Node border width
df2_nodes$color.background <- palette[]
df2_nodes$color.border <- "black"
df2_nodes$color.highlight.background <- "orange"
df2_nodes$color.highlight.border <- "darkred"
df2_nodes$font.size <- 40
visNetwork(df2_nodes, df2_edges, height = "500px", width = "100%") %>% visIgraphLayout(layout = "layout_with_lgl") %>%
visEdges(shadow = TRUE,
color = list(color = "gray", highlight = "orange"))
by_OperatingSystem <- survey_results_public %>%
filter(!is.na(OperatingSystem)) %>%
group_by(OperatingSystem) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(Student = reorder(OperatingSystem,Total)) %>%
mutate(Percent = (Total/nrow(survey_results_public)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_OperatingSystem$OperatingSystem) %>%
hc_add_series(name = "Percent %", data = by_OperatingSystem$Percent, colorByPoint = 1) %>%
hc_title(text = "Primary Operating System") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_NumberMonitors <- survey_results_public %>%
filter(!is.na(NumberMonitors)) %>%
group_by(NumberMonitors) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(Student = reorder(NumberMonitors,Total)) %>%
mutate(Percent = (Total/sum(Total)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_NumberMonitors$NumberMonitors) %>%
hc_add_series(name = "Percent %", data = by_NumberMonitors$Percent, colorByPoint = 1) %>%
hc_title(text = "Number of Monitors at workstation") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_country_salary <- survey_results_public %>% select(Country, Salary) %>% mutate(Salary=as.numeric(Salary)) %>% filter(!is.na(Country)) %>% filter(!is.na(Salary)) %>%group_by(Country) %>% summarize(AvgSalary = median(Salary, na.rm=TRUE))
data(worldgeojson, package = "highcharter")
code <- countrycode(by_country_salary$Country, 'country.name', 'iso3c')
by_country_salary$iso3 <- code
by_country_salary$AvgSalary <- round(by_country_salary$AvgSalary)
highchart() %>%
hc_add_series_map(worldgeojson, by_country_salary, value = "AvgSalary", joinBy = "iso3",colorByPoint = 1) %>%
hc_colorAxis(stops = color_stops()) %>%
hc_legend(enabled = TRUE) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_title(text = "Avg Salary by Country") %>%
hc_tooltip(useHTML = TRUE, headerFormat = "",
pointFormat = "Country: {point.Country} Median Salary: ${point.AvgSalary}") %>% hc_add_theme(hc_theme_google())
motion_df <- survey_results_public %>% select(DevType,YearsCodingProf,Salary) %>%
mutate(YearsCodingNum = parse_number(YearsCodingProf),
DevType = str_split(DevType, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(DevType) %>%
filter(!is.na(DevType)) %>% filter(!is.na(YearsCodingNum))
motion_df2 <- motion_df %>% filter(!is.na(Salary)) %>%
select(DevType, YearsCodingNum, Salary) %>%
filter(!is.na(DevType)) %>% group_by(DevType,YearsCodingNum) %>% summarize(AvgSalary = median(Salary, na.rm=TRUE))
#motion_df$z <- motion_df$AvgSalary
data_strt2 <- motion_df2 %>%
mutate(x = YearsCodingNum, y = AvgSalary, z = 100)
data_strt2$color = distinctColorPalette(length(unique(motion_df2$DevType)))[as.numeric(as.factor(motion_df2$DevType))]
data_seqc2 <- motion_df %>%
arrange(DevType, YearsCodingNum) %>%
group_by(DevType) %>%
summarise(n=n()) %>%
right_join(motion_df2, by="DevType") %>%
group_by(DevType) %>%
do(sequence = list_parse(select(., x = YearsCodingNum, y = AvgSalary, z = n)))
data2 <- left_join(data_strt2, data_seqc2)
highchart() %>%
hc_add_series(data = data2, type = "bubble",
minSize = 0, maxSize = 30, dataLabels = list(enabled = TRUE, format = "{point.DevType}")) %>%
hc_motion(enabled = TRUE, series = 0, labels = unique(motion_df2$YearsCodingNum),
loop = TRUE,
updateInterval = 1000, magnet = list(step = 1)) %>%
hc_plotOptions(series = list(showInLegend = FALSE)) %>%
hc_xAxis(min = 0, max = 30, title = list(text = "Year Of Exp")) %>%
hc_yAxis(min = 500, max = 200000, title = list(text = "Median Salary (USD)")) %>%
hc_title(text = "Motion Plot of Devtype vs Salary vs Year of Exp") %>%
hc_tooltip(useHTML = TRUE, headerFormat = "", pointFormat = "{point.DevType} Year Of Exp: {point.x}y Median Salary: ${point.y} No Of Response : {point.z}") %>% hc_add_theme(hc_theme_google())
global_salary <- survey_results_public %>% select(DevType,Salary) %>%
mutate(DevType = str_split(DevType, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(DevType) %>% filter(!is.na(Salary)) %>%
select(DevType,Salary) %>%
filter(!is.na(DevType)) %>% group_by(DevType) %>% summarize(AvgSalary = median(Salary, na.rm=TRUE)) %>% arrange(desc(AvgSalary)) %>% head(10)
india_salary <- survey_results_public %>% select(DevType,Salary,Country) %>%
mutate(DevType = str_split(DevType, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(DevType) %>% filter(!is.na(Salary)) %>%
filter(Country %in% c("India")) %>%
select(DevType,Salary) %>%
filter(!is.na(DevType)) %>% group_by(DevType) %>% summarize(AvgSalary = median(Salary, na.rm=TRUE)) %>% arrange(desc(AvgSalary)) %>% head(10)
usa_salary <- survey_results_public %>% select(DevType,Salary,Country) %>%
mutate(DevType = str_split(DevType, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(DevType) %>% filter(!is.na(Salary)) %>%
filter(Country %in% c("United States")) %>%
select(DevType,Salary) %>%
filter(!is.na(DevType)) %>% group_by(DevType) %>% summarize(AvgSalary = median(Salary, na.rm=TRUE)) %>% arrange(desc(AvgSalary)) %>% head(10)
p1 <- highchart() %>%
hc_xAxis(categories = global_salary$DevType) %>%
hc_add_series(name = "Median Salary $", data = global_salary$AvgSalary, colorByPoint = 1) %>%
hc_title(text = "Global Salary by Developer Type") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
p2 <- highchart() %>%
hc_xAxis(categories = india_salary$DevType) %>%
hc_add_series(name = "Median Salary $", data = india_salary$AvgSalary, colorByPoint = 1) %>%
hc_title(text = "India Salary by Developer Type") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
p3 <- highchart() %>%
hc_xAxis(categories = usa_salary$DevType) %>%
hc_add_series(name = "Median Salary $", data = usa_salary$AvgSalary, colorByPoint = 1) %>%
hc_title(text = "USA Salary by Developer Type") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
lst <- list(
p1,
p2,
p3
)
hw_grid(lst, rowheight = 400)
motion_df <- survey_results_public %>% select(LanguageWorkedWith,YearsCodingProf,Salary) %>%
mutate(YearsCodingNum = parse_number(YearsCodingProf),
LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(LanguageWorkedWith) %>%
filter(!is.na(LanguageWorkedWith)) %>% filter(!is.na(YearsCodingNum))
motion_df2 <- motion_df %>% filter(!is.na(Salary)) %>%
select(LanguageWorkedWith, YearsCodingNum, Salary) %>%
filter(!is.na(LanguageWorkedWith)) %>% group_by(LanguageWorkedWith,YearsCodingNum) %>% summarize(AvgSalary = median(Salary, na.rm=TRUE))
#motion_df$z <- motion_df$AvgSalary
data_strt2 <- motion_df2 %>%
mutate(x = YearsCodingNum, y = AvgSalary, z = 100)
data_strt2$color = distinctColorPalette(length(unique(motion_df2$LanguageWorkedWith)))[as.numeric(as.factor(motion_df2$LanguageWorkedWith))]
data_seqc2 <- motion_df %>%
arrange(LanguageWorkedWith, YearsCodingNum) %>%
group_by(LanguageWorkedWith) %>%
summarise(n=n()) %>%
right_join(motion_df2, by="LanguageWorkedWith") %>%
group_by(LanguageWorkedWith) %>%
do(sequence = list_parse(select(., x = YearsCodingNum, y = AvgSalary, z = n)))
data2 <- left_join(data_strt2, data_seqc2)
highchart() %>%
hc_add_series(data = data2, type = "bubble",
minSize = 0, maxSize = 30, dataLabels = list(enabled = TRUE, format = "{point.LanguageWorkedWith}")) %>%
hc_motion(enabled = TRUE, series = 0, labels = unique(motion_df2$YearsCodingNum),
loop = TRUE,
updateInterval = 1000, magnet = list(step = 1)) %>%
hc_plotOptions(series = list(showInLegend = FALSE)) %>%
hc_xAxis(min = 0, max = 30, title = list(text = "Year Of Exp")) %>%
hc_yAxis(min = 500, max = 200000, title = list(text = "Median Salary (USD)")) %>%
hc_title(text = "Motion Plot of Programming Language vs Salary vs Year of Exp") %>%
hc_tooltip(useHTML = TRUE, headerFormat = "", pointFormat = "{point.LanguageWorkedWith} Year Of Exp: {point.x}y Median Salary: ${point.y} No Of Response : {point.z}") %>% hc_add_theme(hc_theme_google())
by_salary_gender <- survey_results_public %>% select(Gender,Salary,YearsCodingProf) %>%
mutate(YearsCodingNum = parse_number(YearsCodingProf),
Gender = str_split(Gender, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(Gender) %>%
filter(!is.na(Salary)) %>%
select(Gender, YearsCodingNum, Salary) %>%
filter(!is.na(Gender)) %>% group_by(Gender,YearsCodingNum) %>%
summarize(AvgSalary = median(Salary, na.rm=TRUE))
hchart(by_salary_gender, "line", hcaes(x = YearsCodingNum, y = AvgSalary, group = Gender)) %>%
hc_xAxis(min = 0, max = 30, title = list(text = "Year Of Exp")) %>%
hc_yAxis(min = 500, max = 200000, title = list(text = "Median Salary (USD)")) %>% hc_add_theme(hc_theme_google())
by_salary_devtype <- survey_results_public %>% select(DevType,Salary,YearsCodingProf) %>%
mutate(YearsCodingNum = parse_number(YearsCodingProf),
DevType = str_split(DevType, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(DevType) %>%
filter(!is.na(Salary)) %>%
select(DevType, YearsCodingNum, Salary) %>%
filter(!is.na(DevType)) %>% group_by(DevType,YearsCodingNum) %>%
summarize(AvgSalary = median(Salary, na.rm=TRUE))
hchart(by_salary_devtype, "spline", hcaes(x = YearsCodingNum, y = AvgSalary, group = DevType)) %>%
hc_xAxis(min = 0, max = 30, title = list(text = "Year Of Exp")) %>%
hc_yAxis(min = 500, max = 180000, title = list(text = "Median Salary (USD)")) %>%
hc_legend(align = "left", layout = "vertical", verticalAlign = "top") %>%
hc_tooltip(sort = TRUE, table = TRUE) %>%
hc_title(text = "Developer Type vs Median Salary by Year of Exp") %>%
hc_add_theme(hc_theme_google())
by_salary_LanguageWorkedWith <- survey_results_public %>% select(LanguageWorkedWith,Salary,YearsCodingProf) %>%
mutate(YearsCodingNum = parse_number(YearsCodingProf),
LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";"),
Salary = as.numeric(Salary)) %>%
unnest(LanguageWorkedWith) %>%
filter(!is.na(Salary)) %>%
select(LanguageWorkedWith, YearsCodingNum, Salary) %>%
filter(!is.na(LanguageWorkedWith)) %>% group_by(LanguageWorkedWith,YearsCodingNum) %>%
summarize(AvgSalary = median(Salary, na.rm=TRUE))
hchart(by_salary_LanguageWorkedWith, "spline", hcaes(x = YearsCodingNum, y = AvgSalary, group = LanguageWorkedWith)) %>%
hc_xAxis(min = 0, max = 30, title = list(text = "Year Of Exp")) %>%
hc_yAxis(min = 500, max = 180000, title = list(text = "Median Salary (USD)")) %>%
hc_legend(align = "left", layout = "vertical", verticalAlign = "top") %>%
hc_tooltip(sort = TRUE, table = TRUE) %>%
hc_title(text = "Programming Language vs Median Salary by Year of Exp") %>%
hc_add_theme(hc_theme_google())
by_AIDangerous <- survey_results_public %>%
filter(!is.na(AIDangerous)) %>%
group_by(AIDangerous) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(AIDangerous = reorder(AIDangerous,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100))
highchart() %>%
hc_xAxis(categories = by_AIDangerous$AIDangerous) %>%
hc_add_series(name = "Percent %", data = by_AIDangerous$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Dangerous Aspect of Increasingly Advanced AI Technology") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
survey_results_public2 <- survey_results_public %>% mutate(DevType = strsplit(as.character(DevType), ";")) %>%
unnest(DevType)
df1 <- survey_results_public2 %>%
filter(!is.na(DevType)) %>%
group_by(name = DevType, drilldown = tolower(DevType)) %>%
summarise(y = n()) %>% arrange(desc(y))
df2 <-survey_results_public2 %>% filter(!is.na(DevType)) %>% filter(!is.na(AIDangerous)) %>% group_by(DevType,AIDangerous) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = DevType, id = tolower(DevType),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = AIDangerous, drilldown = tolower(paste(DevType,AIDangerous,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y)))
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'Developer wise Opinion on Dangerous Aspect of AI') %>%
hc_add_series(data = df1, name = "Developer Type",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series =list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
by_AIInteresting <- survey_results_public %>%
filter(!is.na(AIInteresting)) %>%
group_by(AIInteresting) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(AIInteresting = reorder(AIInteresting,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100))
highchart() %>%
hc_xAxis(categories = by_AIInteresting$AIInteresting) %>%
hc_add_series(name = "Percent %", data = by_AIDangerous$Percent, colorByPoint = 1) %>%
hc_title(text = "Most Exciting Aspect of Increasingly Advanced AI Technology") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
survey_results_public2 <- survey_results_public %>% mutate(DevType = strsplit(as.character(DevType), ";")) %>%
unnest(DevType)
df1 <- survey_results_public2 %>%
filter(!is.na(DevType)) %>%
group_by(name = DevType, drilldown = tolower(DevType)) %>%
summarise(y = n()) %>% arrange(desc(y))
df2 <-survey_results_public2 %>% filter(!is.na(DevType)) %>% filter(!is.na(AIInteresting)) %>% group_by(DevType,AIInteresting) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = DevType, id = tolower(DevType),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = AIInteresting, drilldown = tolower(paste(DevType,AIInteresting,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y)))
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'Developer wise Opinion on Exciting Aspect of AI') %>%
hc_add_series(data = df1, name = "Developer Type",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series =list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
by_AIResponsible <- survey_results_public %>%
filter(!is.na(AIResponsible)) %>%
group_by(AIResponsible) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(AIResponsible = reorder(AIResponsible,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100))
highchart() %>%
hc_xAxis(categories = by_AIResponsible$AIResponsible) %>%
hc_add_series(name = "Percent %", data = by_AIResponsible$Percent, colorByPoint = 1) %>%
hc_title(text = "Ramifications of Increasingly Advanced AI Technology") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_AIFuture <- survey_results_public %>%
filter(!is.na(AIFuture)) %>%
group_by(AIFuture) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(AIFuture = reorder(AIFuture,Total)) %>%
mutate(Percent = round(Total/sum(Total)*100))
highchart() %>%
hc_xAxis(categories = by_AIFuture$AIFuture) %>%
hc_add_series(name = "Percent %", data = by_AIFuture$Percent, colorByPoint = 1) %>%
hc_title(text = "Take on the Future of Artificial Intelligence") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
by_Employment <- survey_results_public %>%
filter(!is.na(Employment)) %>%
group_by(Employment) %>%
summarise(Total = n()) %>%
arrange(desc(Total)) %>%
ungroup() %>%
mutate(Employment = reorder(Employment,Total)) %>%
mutate(Percent = (Total/sum(Total)*100)) %>%
head(10)
highchart() %>%
hc_xAxis(categories = by_Employment$Employment) %>%
hc_add_series(name = "Percent %", data = by_Employment$Percent, colorByPoint = 1) %>%
hc_title(text = "Employment Status") %>%
hc_chart(type = "bar", options3d = list(enabled = TRUE, beta = 1, alpha = 1)) %>% hc_add_theme(hc_theme_google())
df1 <- survey_results_public %>% filter(!is.na(Employment)) %>%
group_by(name = Employment, drilldown = tolower(Employment)) %>%
summarise(y = n()) %>% arrange(desc(y)) %>% head(10)
df2 <-survey_results_public %>% filter(!is.na(Employment)) %>% filter(!is.na(Country)) %>% group_by(Employment,Country) %>% dplyr::mutate(y = n(),colorByPoint = 1) %>%arrange(desc(y))%>%
group_by(name = Employment, id = tolower(Employment),colorByPoint) %>%
do(data = list_parse(
mutate(.,name = Country, drilldown = tolower(paste(Employment,Country,sep=": "))) %>%
group_by(name,drilldown) %>%
summarise(y=n())%>% dplyr::select(name, y, drilldown) %>%
arrange(desc(y))) %>% head(10)
)
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = 'Employment Status By Country') %>%
hc_add_series(data = df1, name = "Employment Status",colorByPoint = 1) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(type = "category") %>%
hc_yAxis(title = list(text = "Total Response")) %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series = list_parse(df2)
) %>% hc_add_theme(hc_theme_google())
Constructive criticism is welcome. If there are any suggestions or changes you would like to see in the Kernel please let us know.